========================================================
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#that's a lot of receipts with a 0 amount. exclude the top 1% of contributions to zoom in a bit.
ggplotly(
ggplot(data = pfc, aes(x = contb_receipt_amt)) +
geom_histogram() +
#limit x axis to omit top 1% of val
xlim(0, quantile((pfc$contb_receipt_amt), 0.99))
)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3032 rows containing non-finite values (stat_bin).
#are there zip codes more likely to contribute?
plot_ly(x = pfc$contbr_zip, data = pfc, type = "histogram")
#too specific. are there cities more likely to contribute?
plot_ly(x = pfc$contbr_city, data = pfc, type = "histogram")
#I see you, Denver. Then Boulder, where people can work in Denver.
#Are there occupations that are more likely to contribute?
plot_ly(x = pfc$contbr_occupation, data = pfc, type = "histogram")
## Warning: Ignoring 32 observations
#The largest groups are retirees. I did not expect retirees to be donating so actively.
#Which candidate received more donations? I've heard Colorado is fairly purple.
plot_ly(x = pfc$cand_nm, data = pfc, type = "histogram")
#Let's check that by party
plot_ly(x = pfc$cand_party, data = pfc, type = "histogram")